# -------------------------------------------------------------------
# Purpose: Creates Figure B6
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate all panels of the figure
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataraw))
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(pdataconfanalysis))
stopifnot(dir.exists(poutputappendix))

p_unload(tidytable)

# Load data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))
immigr <- fread(file.path(pdataconfanalysis, "immigrationNameLevel1900BoundariesPanel18801930.csv"))
load(file.path(pdataraw, "US_tl2000_cnty_shp_1790_2000.Rdata"))
load(file.path(pdataraw, "US_tl2000_state_shp_1790_2000.Rdata"))

## to address warning about old-style crs
st_crs(shp2000[["1900"]]) <- st_crs(shp2000[["1900"]])
st_crs(state_shp2000[["1900"]]) <- st_crs(state_shp2000[["1900"]])


# Compute county-level immigration shares
temp <- unique(immigr[, .(year, gisjoin_1900, I_d_adjp, I_adjp)])
temp[, share_I_d_adjp := I_d_adjp / I_adjp]


# Get counties included in analysis
cnty <- unique(countyLevel19001940$gisjoin_1900)


# expand to all years
years <- c(1880, seq(1895, 1930, by = 5))
expanded_data <- expand.grid(gisjoin_1900 = cnty, year = years)
setDT(expanded_data)

# add immigration shares
expanded_data <- merge(expanded_data, temp, all.x = TRUE)
expanded_data[is.na(share_I_d_adjp), share_I_d_adjp := 0]


# residualize
expanded_data[, share_I_d_adjp_r := residuals(feols(share_I_d_adjp ~ 1 | gisjoin_1900 + year, data = expanded_data))]


## Merge data
dfplot <- shp2000[["1900"]] %>%
        filter(ICPSRSTI != 82, ICPSRSTI != 81, ICPSRSTI != 0) %>%
        select(gisjoin_1900 = GISJOIN, geometry) %>%
        rmapshaper::ms_simplify(keep = 0.05, keep_shapes = TRUE) %>% 
        left_join(expanded_data)

state1900 <- state_shp2000[["1900"]] %>%
        filter(ICPSRST != 82, ICPSRST != 81) %>%
        select(geometry) %>%
        rmapshaper::ms_simplify(keep = 0.05, keep_shapes = TRUE)


# Find counties with NA years
missing_years <- dfplot %>% filter(is.na(year))
if(nrow(missing_years) > 0) {
        all_years <- c(1880, seq(1895, 1930, by = 5))
        missing_years_expanded <- map_dfr(all_years, function(yr) {
                missing_years_copy <- missing_years
                missing_years_copy$year <- yr
                return(missing_years_copy)
        })
        dfplot <- rbind(dfplot %>% filter(!is.na(year)), missing_years_expanded)
}


# bin data
dfplot$share_I_d_adjp_r_cut <- cut_number(dfplot$share_I_d_adjp_r, 200)


# define color palette
gnbu_palette <- colorRampPalette(RColorBrewer::brewer.pal(9, "GnBu"))(200)


# create plot
plotname <- file.path(poutputappendix, "figureB06.pdf")
p <- ggplot() +
        geom_sf(data = dfplot, aes(fill = share_I_d_adjp_r_cut), lwd = 0, colour = NA) +
        facet_wrap(~year, ncol = 3, labeller = as_labeller(c("1880" = "Pre 1880", "1895" = "1881-1895", "1900" = "1896-1900", "1905" = "1901-1905", "1910" = "1906-1910", "1915" = "1911-1915", "1920" = "1916-1920", "1925" = "1921-1925", "1930" = "1926-1930"))) +
        coord_sf(crs = st_crs(dfplot), datum = NA) +
        scale_fill_manual(values = gnbu_palette, na.value = "grey") +
        geom_sf(data = state1900, fill = NA, colour = "black", size = 0.1) +
        coord_sf(crs = st_crs(state1900), datum = NA) +
        ggthemes::theme_map() +
        theme(legend.position = "none")
ggsave(plotname, p, width = 8, height = 4.6)

cat("Figure saved to:", plotname, "\n")
p_load(tidytable)
